*********************************************************************************
/*    Create waterfall chart unfolding total cost and revenue per audit	 		*/
*********************************************************************************

**** From metafile.do XX

* Paper or Slides formatting 
global version paper // XX paper or slides

* Personal file path inputs 
global user "C:/Users/chh7970" // XX
global dropbox_name "Charlie Hutchinson" // XX

if "${version}"=="paper" global graphpath "${mvpf_audits}/Paper/figures/label" 
if "${version}"=="slides" global graphpath "${mvpf_audits}/Figures/${version}"

* years to keep
global min_yr = 2010
global max_yr = 2014
global multiplier_yr = 2003 // base year for projection and discounting

* GS payscale step
global step = 5

* Income definition
global inc = "tpi" // Takes values "agi" and "tpi"

*formatting 
global tc = "gs4"
global rc_col= "128 67 128" // tasteful eggplant

* Set color scheme (high contrast use for plots with many series)
set scheme opp_insights_fb

**** End of code from metafile.do // XXX
**** Start of code from waterfall_cost_rev_future_disc.do // XXX

* Set directories 

global dropbox "${user}/Opportunity Insights Dropbox"
global oi_home "${dropbox}/Opportunity Insights Shared Workspace"
global dropbox_local "${dropbox}/${dropbox_name}"
global mvpf_audits "${dropbox_local}/MVPF-Tax-Audits" 

adopath + "${oi_home}/Research files/ado"

global mvpf_audits "/Users/chh7970/Opportunity Insights Dropbox/Charlie Hutchinson/MVPF-Tax-Audits" // XXX

* File paths
local datapath "${mvpf_audits}/data/cleaned" 
local graphpath "${graphpath}" 

* Import data 
use "`datapath'/audit_tab_prim_inc_${min_yr}_${max_yr}.dta", clear // XXX

* Waterfall chart to produce 
*local useloop = `1' // =1 for operational, =2 corr, =3 both
local useloop = 1 // =1 for operational, =2 corr, =3 both // XXX DELETE/CHANGE, shouldn't be hardcoded in - ellen

* Groups of variables
#delimit ;
local rev_vars = "rev_tot rev_appl rev_cnsl rev_exam rev_tot_future 
				  rev_future rev_coll_notice rev_coll rev_tot_disc discount
				  rev_coll_notice  rev_coll_acs  rev_coll_queue  rev_coll_field "
				  ;
			
local cost_vars = "costs_coll costs_exam costs_appl costs_tot
				labor_costs_exam${step} labor_costs_appl${step} labor_costs_coll
				labor_costs_cnsl${step} oh_costs_exam oh_costs_appl oh_costs_coll
				costs_notices costs_acs
				costs_labor costs_oh
				costs_tot_future costs_future" 
				;
#delimit cr 

* Variables of interest 
local vars `cost_vars' `rev_vars'

* Version of fig to produce 
global version ${version} // paper or slides

* Other formatting
if "${version}" == "slides" {
	global title "title(" ", size(vhuge))"
	global img wmf
	graph set window fontface "Arial"
	global title "title(" ", size(vhuge))"
	global axis_size "" //xsize(6.5) ysize(4)" // makes figure wider
}

else if "${version}" == "paper" {
	global title "title("")"
	global img pdf
	graph set window fontface "LMRoman10-Regular"
	global title " "
	global axis_size " " // makes figure wider
}



* Ops waterfall
	if `useloop'== 1 { 
		local hetlist `""if exam_type_pooled==1" "'
		local hettexts `" "Operational" "'
		local hetlabs `""ops" "'
	}

* Corr waterfall
	if `useloop'== 2 { 
		local hetlist `""if exam_type_pooled==3" "'
		local hettexts `" "Correspondence" "'
		local hetlabs `""corr" "'
	}
	
* create waterfall for all audits
	if `useloop'== 3 { 
		local hetlist `""if exam_type_pooled==1" "if exam_type_pooled==3""'
		local hettexts `" "Operational" "Correspondence""'
		local hetlabs `""ops" "corr""'
	}	
	

/*------------------------------------------------------------------------------
Create waterfall 
------------------------------------------------------------------------------*/

local j=1
foreach h in `hetlist' {
	local hetlab: word `j' of `hetlabs'
	local hettext: word `j' of `hettexts'
	di "`h'" "`hetlab'"	
		
	forval tpibin = 0(1)22{ // XXX 
	preserve
	
	    keep if tpi_pctl == `tpibin' // XXX
					
		* Store total cost components as locals
		foreach var in  `vars' {	
			qui summ `var' `h'
			local `var'_`hetlab' = r(mean)
		}
			
		* Store R/C for waterfall chart
		local rc : di %4.2f `rev_tot_disc_`hetlab''/`costs_tot_future_`hetlab''
			
		** Set fig range (y-axis) for each waterfall (ops and corr)
		if "`hetlab'"== "ops" {
		
		local ub_rev =  round(`rev_tot_`hetlab''*1.13, 100) // XXX CHANGE?
		
		if `ub_rev' <= 12000 {
			local lb = 0
			local ub = 10000 // round(`rev_tot_`hetlab''*1.13, 10) 
			local int = 2000 //(`ub' - `lb')/5
		}
		if `ub_rev' <= 20000 & `ub_rev' >= 12000 {
			local lb = 0
			local ub = 20000 // round(`rev_tot_`hetlab''*1.13, 10) 
			local int = 5000 //(`ub' - `lb')/5
		}
		if `ub_rev' <= 40000 & `ub_rev' >= 20000 {
			local lb = 0
			local ub = 40000 // round(`rev_tot_`hetlab''*1.13, 10) 
			local int = 10000 //(`ub' - `lb')/5
		}
		if `ub_rev' >= 80000 {
			local lb = 0
			local ub = 120000 // round(`rev_tot_`hetlab''*1.13, 10) 
			local int = 20000 //(`ub' - `lb')/5
		}
		
		*local ub_cost =  round(`rev_tot_`hetlab''*1.13, 100) // XXX CHANGE?	
		*local ub = max(`ub_rev',`ub_cost') // XXX CHANGE?			
		*local int = (`ub' - `lb')/5	// XXX CHANGE?	
		
		}
			
		if "`hetlab'"== "corr" {
		local lb = 0
		local ub = 1200 // round(`rev_tot_`hetlab''*1.13, 10) 
		local int = 300 //(`ub' - `lb')/5
		}

		* Create waterfall 
		clear all
 
		* Manual inputs
		set obs 100
			
		g xaxis = _n
		replace xaxis = 0.99 in 1

		local max_obs = 12 // Total number of components 
		local white_out_first_cost = 2 // Observation first white out bar appears 
		local white_out_last_cost = 4 // Observation last white out bar appears 
		local xaxis_total_cost = 5
			
		local white_out_first = 8 // Observation first white out bar appears 
		local white_out_last = 11 // Observation last white out bar appears 
		local xaxis_total_rev = 12

		drop if xaxis>`max_obs' 
			
		* set y-axis label and figure name
		local ylab = "Dollars per Audit ($)"
		local fig_name = "waterfall_costs_rev"
			
		* y coordinate for text
		local y = (7/8)*`ub'
			
		** Cost Data Inputs 
		g cost = `costs_exam_`hetlab'' in 1 			// Exams
			replace cost =  `costs_appl_`hetlab'' in 2 // Appeals
			replace cost =  `costs_coll_`hetlab'' in 3 // Collections
			replace cost =  `costs_future_`hetlab'' in 4 // Future costs
			replace cost =  `costs_tot_future_`hetlab'' in 5 // Total cost
			replace cost =  . in 6
				
		g cost2=cost
		g white_out = cost2 if xaxis<6

		forval i = `white_out_first_cost'/`white_out_last_cost' {
			replace white_out = cost2[`i'-1]  in `i' if cost>0
			replace cost2 = cost2[`i'-1] + cost  in `i' if cost>0
		}
		
		* Generate variable for labor and oh costs (exam and appeals)
		g costs_labor = `labor_costs_exam${step}_`hetlab'' in 1 
			replace costs_labor = `labor_costs_appl${step}_`hetlab'' + `labor_costs_cnsl${step}_`hetlab'' in 2 
			replace costs_labor = `labor_costs_coll_`hetlab'' in 3 
				
		g costs_oh = `oh_costs_exam_`hetlab'' in 1 
			replace costs_oh = `oh_costs_appl_`hetlab'' in 2
			replace costs_oh = `oh_costs_coll_`hetlab'' + `costs_notices_`hetlab'' + `costs_acs_`hetlab'' in 3 // oh and non-direct labor costs coll
			
		* Exam overhead multipliers breakdown 
		if "`hetlab'"== "ops" {
		    local m1 = 1.9279
			local m2 = 1.0054
			local m3 = 1.4521  
			
			local scale = 650
		}
		
		if "`hetlab'"== "corr" {
		    local m1 = 6.5718
			local m2 = 3.4308
			local m3 = 6.0677
			
			local scale = 10
		}
		
			g oh_m1 = `labor_costs_exam${step}_`hetlab''*`m1' + ///
				  `labor_costs_exam${step}_`hetlab'' 	in 1	 // primary costs
			g oh_m2 = `labor_costs_exam${step}_`hetlab''*`m2' + oh_m1 in 1		 // secondary - BU
			g oh_m3 = `labor_costs_exam${step}_`hetlab''*`m3' + oh_m2 in 1		// BU
		
			
		replace white_out = . in 1
			
		* Generate darker bar for labor costs
		g costs_labor2 = `labor_costs_exam${step}_`hetlab'' in 1 
			replace costs_labor2 = cost2[1] + `labor_costs_appl${step}_`hetlab'' + `labor_costs_cnsl${step}_`hetlab'' in 2 
			replace costs_labor2 = cost2[2] + `labor_costs_coll_`hetlab'' in 3 
			
		* save oh components as labels 
		foreach var in costs_labor oh_m1 oh_m2 oh_m3 {
			sum `var' in 1
			local `var' =  r(mean) - `scale'
		}
		
		local costs_labor_lab: di %4.0fc `labor_costs_exam${step}_`hetlab''
		local oh_m1_lab: di %4.0fc `labor_costs_exam${step}_`hetlab''*`m1'
		local oh_m2_lab: di %4.0fc `labor_costs_exam${step}_`hetlab''*`m2'
		local oh_m3_lab: di %4.0fc `labor_costs_exam${step}_`hetlab''*`m3'

		
		* Labels to plot on top of bars
		g label_cost = "$"+ string(cost, "%9.0fc")   if cost>=0
		g label_cost_labor = "$"+ string(costs_labor, "%9.0fc")   
		g label_cost_oh = "$"+ string(costs_oh, "%9.0fc")  
			
		** Revenue data inputs
		g rev = `rev_exam_`hetlab'' in 7 // Exams
			replace rev =  `rev_cnsl_`hetlab'' + `rev_appl_`hetlab'' in 8 // Appeals and Counsel
			replace rev =  `rev_coll_`hetlab'' in 9 //  Collections
			replace rev =  `rev_future_`hetlab'' in 10 //  Future revenues
			replace rev =  `discount_`hetlab'' in 11 // Total revenues
			replace rev =  `rev_tot_disc_`hetlab'' in 12 // Total revenues (discounted)
		g rev2= rev
		replace white_out = rev2 if xaxis>6

		forval i = `white_out_first'/`white_out_last' {
			replace white_out = rev2[`i'-1]  in `i' if rev>0
			replace rev2 = rev2[`i'-1] + rev  in `i' if rev>0
			
			* if revenue component negative
			replace white_out = rev2[`i'-1] + rev  in `i' if rev<0
			replace rev2 = rev2[`i'-1]   in `i' if rev<0
		}
		
		* Collections revenue components
		g rev_exam_appl = `rev_exam_`hetlab'' + `rev_cnsl_`hetlab'' + `rev_appl_`hetlab''
		g rev_coll1 = `rev_coll_acs_`hetlab'' + rev_exam_appl 	in 9	 // ACS
		g rev_coll2 = `rev_coll_queue_`hetlab'' + rev_coll1 in 9		 // Queue
		g rev_coll3 = `rev_coll_field_`hetlab'' + rev_coll2 in 9		// field
		g rev_coll4 = `rev_coll_notice_`hetlab'' + rev_coll3 in 9		// field
		
		* save collections components as labels 
		foreach var in rev_coll1 rev_coll2 rev_coll3 rev_coll4 {
			sum `var' in 9
			local `var' =  r(mean) - `scale'
		}

		local rev_coll1_lab: di %4.0fc `rev_coll_acs_`hetlab''
		local rev_coll2_lab: di %4.0fc `rev_coll_queue_`hetlab''
		local rev_coll3_lab: di %4.0fc `rev_coll_field_`hetlab''
		local rev_coll4_lab: di %4.0fc `rev_coll_notice_`hetlab''
*/
		* Labels to appear on top of bars
		g label_rev = "$"+ string(rev, "%9.0fc")   if rev>=0
		replace label_rev = "-$"+ string(abs(rev), "%9.0fc")   if rev<0
			
		global labs "mlabs(small) msym(none) mlabc("$tc")"
		
		
		* Store labor and oh costs for exam stage 
		qui summ costs_labor if xaxis<1
		local lab_cost_exam = "$"+ string(r(mean), "%9.0fc")
		
		qui summ costs_oh if xaxis<1
		local lab_oh_exam = "$"+ string(r(mean), "%9.0fc")
		
		* Text for slides
		if "${version}" == "paper" {
			global text = ""
		}
		
		else if "${version}" == "slides" {
			/*
			# delimit ;
			global text = "text(`y' 3 "R/C = `rc'", size(4.5) placement(e)) ///
			text(550 1.5 "Wages x hours =" "`lab_cost_exam'", size(2.5) placement(e)) ///
			text(2700 1.5 "Additional costs =" "`lab_oh_exam'", size(2.5) placement(e)) /// "
			;
			#delimit cr
			*/
		}
		
		global oh_color = "240 128 128"
			
			* Generate chart 
		tw  (bar cost xaxis if inrange(xaxis, 4, 6), barwidth(0.6)  lcolor(%0) color(gs11))  ///
			(bar cost2 xaxis if inrange(xaxis, 0, 3), barwidth(0.6)  lcolor(cranberry) color("$oh_color"))  ///
			(bar costs_labor2 xaxis if xaxis<1 | xaxis==2 | xaxis==3, barwidth(0.6)  lcolor("$oh_color") color("$oh_color")) ///
			(bar rev xaxis if xaxis>=`xaxis_total_rev', barwidth(0.6)  lcolor(%0) color(gs11))  ///
			(bar rev2 xaxis if xaxis<`xaxis_total_rev', barwidth(0.6)  lcolor(%0) color(sea))  ///
			(bar oh_m3 xaxis, barwidth(0.6)  lcolor(cranberry) color("$oh_color"))  ///
			(bar oh_m2 xaxis, barwidth(0.6)  lcolor(cranberry) color("$oh_color"))  ///
			(bar oh_m1 xaxis, barwidth(0.6)  lcolor(cranberry) color("$oh_color"))  ///
			///(bar rev_coll4 xaxis, barwidth(0.6)  lcolor(sea) color(sky))  ///
			///(bar rev_coll3 xaxis, barwidth(0.6)  lcolor(sea) color(sky))  ///
			///(bar rev_coll2 xaxis, barwidth(0.6)  lcolor(sea) color(sky))  ///
			///(bar rev_coll1 xaxis, barwidth(0.6)  lcolor(sea) color(sky))  ///
			(bar costs_labor2 xaxis, barwidth(0.6)  lcolor(cranberry) color(cranberry))  ///
			(bar cost2 xaxis if xaxis==4, barwidth(0.6)  lcolor(cranberry) color(cranberry))  ///
			(bar white_out xaxis if inrange(xaxis, `white_out_first', `white_out_last') | ///
				inrange(xaxis, `white_out_first_cost', `white_out_last_cost'), ///
				color(white) lcolor(white) lw(thin) barwidth(0.65)) ///
			(scatter cost xaxis if xaxis>=`xaxis_total_cost' & xaxis<`max_obs', mlab(label_cost) $labs mlabpos(12)) ///
			(scatter cost2 xaxis if xaxis<`xaxis_total_cost', mlab(label_cost) $labs mlabpos(12)) ///
			(scatter rev xaxis if xaxis>=`xaxis_total_rev', mlab(label_rev) mlabs(small) msym(none) mlabc("$tc") mlabpos(12)) ///
			(scatter rev2 xaxis if inrange(xaxis, 6, 10), mlab(label_rev) mlabs(small) msym(none) mlabc("$tc") mlabpos(12)) ///
			(scatter white_out xaxis if xaxis==11, mlab(label_rev) mlabs(small) msym(none) mlabc("$tc") mlabpos(6)) ///
			///(pcarrowi `y' 2.4 `y' 2.9, color(black) ) ///
			, ///
			text(`costs_labor' 1.4 "Wages x hours" "= $`costs_labor_lab'", size(2) place(e)) ///
			text(`oh_m1' 1.4 "Labor/fringe/primary" "= $`oh_m1_lab'", size(2) place(e)) ///
			text(`oh_m2' 1.4 "Organization-wide costs"  "= $`oh_m2_lab'", size(2) place(e)) ///
			text(`oh_m3' 1.4 "Overhead/HQ costs"  "= $`oh_m3_lab'", size(2) place(e)) ///
			///text(`rev_coll1' 9.4 "ACS = $`rev_coll1_lab'", size(2) place(e)) ///
			///text(`rev_coll2' 9.4 "Queue = $`rev_coll2_lab'", size(2) place(e)) ///
			///text(`rev_coll3' 9.4 "Field = $`rev_coll3_lab'", size(2) place(e)) ///
			///text(`rev_coll4' 9.4 "Notices (1-4) = $`rev_coll4_lab'", size(2) place(e)) ///
			ylabel(`lb'(`int')`ub', nogrid format(%9.0fc) ) yscale(r(`lb_f' `ub_f') ) ///
			ytitle("`ylab'", size(4.5)) xtitle("")   ///
			legend(off) ///
			text(`y' 2.25 "R/C = `rc'", size(4) placement(e)) ///
			${title} ${axis_size} ///
			xlabel( ///
				0.99 `"Exams"' ///
				2 `""Appeals" """' ///
				3 `"Collections"' ///
				4 `"Projected"' ///
				5 `""{bf:Total}" "{bf:costs}""' ///
				6 `" "' ///
				7 `"Exams"' ///
				8 `"Appeals"' ///
				9 `"Collections"' ///
				10 `"Projected"' ///
				11 `""Time" "discount""' ///
				12 `""{bf:Total}" "{bf:revenue}""' ///
				, labsize(2) notick nogrid )  
			graph export "C:\Users\chh7970\Opportunity Insights Dropbox\Charlie Hutchinson\MVPF-Tax-Audits\Paper\figures\label\waterfall_by_tpi\`fig_name'_`hetlab'_`tpibin'.wmf", as(wmf) replace				
			///graph export "`graphpath'/waterfall_by_tpi/`fig_name'_`hetlab'_`tpibin'.png", as(png) replace///	
			restore
			}
				
		local j=`j'+1
	}		

* Export data used in figure 1	
*/
preserve 
	if `useloop' == 1{
		keep if exam_type_pooled == 1
	}

	if `useloop' == 2{
		keep if exam_type_pooled == 3
	}

	#delimit ;

	* added tpi_pctl to keep list;	
	keep costs_exam costs_appl costs_coll costs_future costs_tot_future 
		labor_costs_exam${step} labor_costs_appl${step} labor_costs_cnsl${step} labor_costs_coll 
		costs_notices costs_acs 
		rev_exam rev_cnsl rev_appl rev_coll rev_future discount rev_tot_disc
		tpi_pctl
	;

	#delimit cr	

	*** Modifications to data to match the graph
	gen rev_appl_tot = rev_appl + rev_cnsl
	gen costs_exam_labor_fp = labor_costs_exam${step}*`m1'
	gen costs_exam_org_wide = labor_costs_exam${step}*`m2'
	gen costs_exam_oh = labor_costs_exam${step}*`m3'
	gen labor_costs_appl = labor_costs_appl${step} + labor_costs_cnsl${step}
	gen oh_costs_appl = costs_appl - labor_costs_appl
	gen oh_costs_coll = costs_coll - labor_costs_coll

	* added tpi_pctl to order
	order tpi_pctl costs_exam labor_costs_exam${step} costs_exam_labor_fp costs_exam_org_wide costs_exam_oh costs_appl labor_costs_appl oh_costs_appl costs_coll labor_costs_coll oh_costs_coll costs_future costs_tot_future rev_exam rev_appl_tot rev_coll rev_future discount rev_tot_disc

	drop rev_appl rev_cnsl labor_costs_appl${step} labor_costs_cnsl${step} costs_notices costs_acs
		
	if `useloop' == 1{	
		export delimited "${mvpf_audits}/data/figures/figure_1_inc.csv", replace  // XXX
	}

	if `useloop' == 2{
		export delimited "${mvpf_audits}/data/figures/figure_a2.csv", replace
	}
restore
